*===================================================================*
*   BIHAR EVALUATION OF SOCIAL FRANCHISING AND TELEMEDICINE (BEST)
*                      STANDARDIZED PATIENTS
* Creates stata files (dta) for Diarrhea and Pneumonia using the raw
* txt files
* Some minor cleaning - SECOND ROUND DATA
*
*====================================================================*

/* NOTES
(1) This do file creates the dta files for Diarrhea and Pneumonia using the raw txt files
(2) Stata will automatically recognize Row 1 as variable names. But, it will think that row 2 is data
(3) Read txt file READ_ME to understand how txt files were created. 
(4) Cleaning for drugs and creates a file for the drug list*/


*====================================================================*
* SP Data Diarrhea (txt file)
*====================================================================*

* Open datafile (obs. 205)
  insheet using "$rawdatap2\Sp_diarrhea_data_Best_2014_0924.txt", names clear

* Labelling variables using 2nd row
  foreach var of varlist * {
    local varlab = `var'[1]
    label var `var' "`varlab'"
    }

* Drop first row and ensure obs.=205
  drop in 1
  duplicates report
  assert r(unique_value) == 205

* Sample
/*s2: statement correct*/
  destring completed s2, replace
  gen     sample2 = 1
  replace sample2 = 2 if completed==2 
  replace sample2 = 2 if completed==1 & reason!=""
  replace sample2 = 2 if s2==2 | s2==.
  lab var sample2 "Sample SP - Diarrhea"
  order sample2, first
  tab sample2

* Homogenize provider Identification Variables
  replace qq3a = "241"      if qq3a=="239"         & qq7=="70029"
  replace qq2 = "MANSI"     if qq2=="KHAGARIYA"    & qq7=="211052"
  replace qq2 = "MANSI"     if qq2=="KHAGARIYA"    & qq7=="211053"
  replace qq2 = "MOHANPUR"  if qq2=="MOHUDDINAGAR" & qq7=="285042"
  replace qq1 = "BEGUSARAI" if qq1=="SAMASTIPUR"   & qq7=="295010"
  replace qq1 = "SITAMARHI" if qq1=="MUZAFFARPUR"  & qq7=="356018"
  replace qq1 = "SITAMARHI" if qq1=="MUZAFFARPUR"  & qq7=="356047"

  rename qq1  district_name
  rename qq2  block_name
  rename qq3  village_name
  rename qq4  cluster_name
  destring qq4a, gen(cluster)
  destring qq3a, gen(village)
  destring qq2a, gen(block)
  destring qq1a, gen(district)
  drop qq1a qq2a qq3a qq4a

/*NOTE: This corrections based on Checks_ID_19_03_2015_updated by Bhartendu*/
  replace qq7 = "397042" if qq7=="BIH 002"
  replace qq7 = "207055" if qq7=="jai01"
  replace qq7 = "321045" if qq7=="sak01"
  replace qq7 = "382038" if qq7=="328038"
  replace qq7 = "353037" if qq7=="353937"

  destring qq7, gen(prov_id)

* Destring/tostring
  destring,  replace
  tostring t1_5a t1_6a, replace
  replace t1_5a="" if t1_5a=="."
  replace t1_6a="" if t1_6a=="."
  tostring t1_5b t1_6b, replace

* Basic med corrections
  forvalues n=1/6 {
    replace t1_`n'a="" if t1_`n'a=="-99"
    replace t1_`n'b=subinstr(t1_`n'b, "ml","ML",.)
    replace t1_`n'b=subinstr(t1_`n'b, "6ML","60ML",.)
    replace t1_`n'b=subinstr(t1_`n'b, "20 MG","20ML",.)
    replace t1_`n'b=subinstr(t1_`n'b, "30 ML","30ML",.)
    replace t1_`n'b=subinstr(t1_`n'b, "50 ML","50ML",.)
    replace t1_`n'b=subinstr(t1_`n'b, "60 ML","60ML",.)
    }

* Identify disease
  rename case case_or
  encode case_or, gen(case)
  order case, first
  lab var case "Type of disease"
  lab def case 1 "Diarrhea" 2 "Pneumonia", replace
  lab val case case
  drop case_or

* Save file
  compress
  save "$prodata2\standardized_patient_diarrhea_a", replace





*====================================================================*
* SP Data Pneumonia (txt file)
*====================================================================*
  insheet using "$rawdatap2\Sp_pneumonia_data_Best_2014_0924.txt", names clear

* Labelling variables using 2nd row
  foreach var of varlist * {
    local varlab = `var'[1]
    label var `var' "`varlab'"
    }

* Drop first row and ensure obs.=205
  drop in 1
  duplicates report
  assert r(unique_value) == 205

* Sample
  destring completed s2, replace
  gen     sample3 = 1
  replace sample3 = 2 if completed==2 
  replace sample3 = 2 if reason!=""
  replace sample3 = 2 if s2==2 | s2==.
  lab var sample3 "Sample SP - Pneumonia"
  order sample3, first
  tab sample3


* Homogenize provider Identification Variables
  replace qq3a = "871"     if qq3a=="870"  & qq7=="242041"
  replace qq2a = "134"     if qq2a=="153"  & qq7=="285047"
  replace qq2a = "134"     if qq2a=="153"  & qq7=="285048"
  replace qq2 = "MOHANPUR" if (qq7=="285047" | qq7=="285048")
  replace qq3a = "1521"    if qq3a=="1522" & qq7=="353038"

  replace qq2 = "MANSURCHAK" if qq2=="MASURCHAK" & qq1a=="1" & qq2a=="15" 
  replace qq2 = "BAKHRI"     if qq2=="BAKHARI"   & qq1a=="1" & qq2a=="17" 
  replace qq2 = "ROSERA"     if qq2=="Rosera"    & qq1a=="6" & qq2a=="142" 
  replace qq2 = "CHANPATIA"  if qq1a=="8"        & qq2a=="67" 

  rename qq1  district_name
  rename qq2  block_name
  rename qq3  village_name
  rename qq4  cluster_name
  destring qq4a, gen(cluster)
  destring qq3a, gen(village)
  destring qq2a, gen(block)
  destring qq1a, gen(district)
  drop qq1a qq2a qq3a qq4a

/*NOTE: This corrections based on Checks_ID_19_03_2015_updated by Bhartendu*/
  replace qq7 = "348050" if qq7=="DAN 01"
  replace qq7 = "117040" if qq7=="KHA-012"
  replace qq7 = "87051"  if qq7=="Tet001"
  replace qq7 = "6055"   if qq7=="KRO-03"
  replace qq7 = "95062"  if qq7=="SHI001"
  replace qq7 = "318039" if qq7=="kor02"
  replace qq7 = "103036"  if qq7=="TUR-01"
  replace qq7 = "70022"  if qq7=="7022"

  destring qq7, gen(prov_id)

* Destring/tostring
  destring, replace

* Basic med corrections
  forvalues n=1/6 {
    tab t1_`n'a
    replace t1_`n'a="" if t1_`n'a=="-99"
    replace t1_`n'b=subinstr(t1_`n'b, "ml","ML",.)
    replace t1_`n'b=subinstr(t1_`n'b, "6ML","60ML",.)
    replace t1_`n'b=subinstr(t1_`n'b, "0.50MG","0.5MG",.)
    replace t1_`n'b=subinstr(t1_`n'b, "20 MG","20ML",.)
    replace t1_`n'b=subinstr(t1_`n'b, "30 ML","30ML",.)
    replace t1_`n'b=subinstr(t1_`n'b, "50 ML","50ML",.)
    replace t1_`n'b=subinstr(t1_`n'b, "60 ML","60ML",.)
    replace t1_`n'b=subinstr(t1_`n'b, "100 ML","100ML",.)
    }


* Identify disease
  drop case
  gen     case = 2
  lab var case "Type of disease"
  order case, first
  lab val case case

* Save file
  compress
  save "$prodata2\standardized_patient_pneumonia_a", replace
